# Uploading the files and creating subsets
fp <- file.path(getwd(), "..", "data", "cultures", "microbes", "viable_counts.xlsx")
df <- read.xlsx(fp, sheet = 1) # import in R the dataset (excel file) as dataframe
# let's transform the first variable in factor (useful for data exploration by groups)
df <- df %>% mutate_at(vars(sample, replicate),
as.factor)
# let's remove the repl. 1 because considered outlier from NGS analysis
df <- subset(df, replicate != "1")
df <- df[-2] # let's remove the "replicate" column
We analyzed 12 samples:
RM = raw milk
RM.H = raw milk heat treated (54°C for 54min)
eRM = enriched raw milk (spontaneous fermentation at 10°C for 21d)
eRM.H = enriched heated raw milk
eRM.HS = enriched heated, salted raw milk (5% v/w NaCl)
NWC = natural whey culture (thermophilic)
eRWC.y = enriched raw milk whey culture, young. Mix NWC-eRM (1:10), incubated at 38°C for 6h
eRWC.o = old, incubated at 38°C for 22h
eRWC.H.y = mix NWC-eRM.H (1:10), young
eRWC.H.o = mix NWC-eRM.H (1:10), old
eRWC.HS.y = mix NWC-eRM.HS (1:10), young
eRWC.HS.o = mix NWC-eRM.HS (1:10), old
Here is a summary statistic of the results divided for each sample:
dstats <- function(x)sapply(x, mystats)
dfstats <- by(df[2:9],
list(sample = df$sample), dstats)
dfstats
## sample: eRM
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 9.2 9.1 9.1 3.6 6.8
## stdev 0.0 0.2 0.2 0.6 1.3
## max 9.2 9.3 9.3 4.0 8.0
## min 9.2 8.9 8.9 2.9 5.4
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 4.6 6.8 6.5
## stdev 0.4 0.5 0.8
## max 5.1 7.3 7.4
## min 4.3 6.3 5.8
## ------------------------------------------------------------
## sample: eRM.H
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 7.9 7.7 7.6 3.1 7.0
## stdev 0.4 0.6 1.2 0.7 0.7
## max 8.3 8.1 8.9 3.8 7.7
## min 7.5 7.0 6.5 2.6 6.4
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 3.8 6.6 6.8
## stdev 1.4 1.0 1.0
## max 5.4 7.7 7.4
## min 2.8 5.8 5.7
## ------------------------------------------------------------
## sample: eRM.HS
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 6.6 6.1 4.5 0.4 4.7
## stdev 0.4 1.2 0.8 0.6 0.7
## max 6.8 7.0 5.4 1.1 5.4
## min 6.2 4.8 4.1 0.0 4.3
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 1.3 6.0 3.7
## stdev 1.5 1.1 0.4
## max 2.9 6.7 4.2
## min 0.0 4.7 3.4
## ------------------------------------------------------------
## sample: eRWC.H.o
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 7.2 7.2 7.1 1.8 6.1
## stdev 0.7 0.8 0.8 0.7 0.3
## max 7.9 7.9 7.8 2.3 6.4
## min 6.5 6.4 6.3 1.0 5.8
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 5.7 3.3 3.6
## stdev 0.3 1.9 2.3
## max 5.9 5.4 5.0
## min 5.4 1.5 1.0
## ------------------------------------------------------------
## sample: eRWC.H.y
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 8.3 7.7 7.8 2.3 5.9
## stdev 0.8 1.1 0.9 1.2 0.9
## max 8.8 8.9 8.9 3.0 6.7
## min 7.4 6.8 7.3 0.9 4.8
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 4.4 5.5 4.9
## stdev 1.1 1.2 1.2
## max 5.2 6.6 5.7
## min 3.2 4.3 3.5
## ------------------------------------------------------------
## sample: eRWC.HS.o
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 7.2 7.0 7.2 1.1 4.1
## stdev 0.7 0.5 1.1 1.2 1.9
## max 7.9 7.6 8.0 2.3 5.4
## min 6.5 6.7 6.0 0.0 2.0
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 4.3 1.5 2.1
## stdev 2.1 1.1 1.9
## max 6.0 2.7 4.3
## min 2.0 0.5 1.0
## ------------------------------------------------------------
## sample: eRWC.HS.y
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 8.4 7.7 7.1 1.2 3.3
## stdev 1.0 1.4 1.8 1.6 0.3
## max 9.3 9.3 9.2 3.0 3.7
## min 7.3 6.8 5.9 0.0 3.0
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 2.6 3.0 1.6
## stdev 2.3 0.4 1.4
## max 4.6 3.4 2.4
## min 0.0 2.5 0.0
## ------------------------------------------------------------
## sample: eRWC.o
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 7.3 6.5 7.4 1.7 5.5
## stdev 1.0 0.2 1.2 0.5 0.8
## max 8.4 6.8 8.3 2.2 6.2
## min 6.5 6.3 6.0 1.1 4.6
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 5.7 3.2 4.8
## stdev 1.2 1.3 1.8
## max 6.5 4.7 6.6
## min 4.3 2.3 3.0
## ------------------------------------------------------------
## sample: eRWC.y
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 8.0 7.5 7.9 2.1 5.8
## stdev 0.7 0.7 0.2 0.4 1.9
## max 8.6 8.0 8.1 2.4 7.7
## min 7.3 6.8 7.7 1.6 4.0
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 4.1 4.0 3.2
## stdev 0.8 0.6 1.1
## max 4.7 4.7 4.4
## min 3.2 3.7 2.2
## ------------------------------------------------------------
## sample: NWC
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 8.5 7.8 7.7 2.3 2.8
## stdev 0.7 1.1 1.3 1.2 0.6
## max 9.1 9.1 9.1 3.0 3.3
## min 7.7 7.0 6.4 1.0 2.1
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 1.2 2.3 0.4
## stdev 2.0 0.4 0.7
## max 3.5 2.8 1.3
## min 0.0 2.0 0.0
## ------------------------------------------------------------
## sample: RM
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3.0 3.0
## mean 4.9 4.3 3.1 1.0 2.3
## stdev 0.9 0.9 0.6 0.1 0.3
## max 5.4 5.3 3.7 1.1 2.7
## min 3.9 3.5 2.4 0.8 2.0
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3.0 3.0 3.0
## mean 1.1 4.3 1.7
## stdev 0.3 0.3 0.3
## max 1.3 4.6 1.9
## min 0.8 4.1 1.3
## ------------------------------------------------------------
## sample: RM.H
## Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n 3.0 3.0 3.0 3 3.0
## mean 2.1 1.4 0.8 0 0.7
## stdev 0.2 0.2 0.3 0 0.3
## max 2.4 1.7 1.2 0 1.0
## min 1.9 1.2 0.5 0 0.4
## Yeasts.and.molds Staphylococci Enterobacteriaceae
## n 3 3 3.0
## mean 0 1 0.3
## stdev 0 0 0.3
## max 0 1 0.5
## min 0 1 0.0
This boxplot shows the spontaneous fermentation effect of RM on the concentration of different microbial groups.
# for boxplots, the dataset in melted form is preferred
df2 <- melt(df, id = "sample", value.name = "CFU_mL")
Statistical analysis was performed in order to define whether the evaluated differences were significant. If the variances of the two groups resulted homogeneous to the Fisher’s test, a Two Sample t-test was applied; alternatively a Welch t-test was used.
# variance homogeneity (for interpretation http://www.sthda.com/english/wiki/f-test-compare-two-variances-in-r)
# If p-value>0.05 variances are homogeneous
sample_names <- c("RM", "eRM")
subdf2 <- filter(df2, sample %in% sample_names)
m_vars = unique(subdf2$variable)
for (x in m_vars) {
test_two_grps(x, subdf2, sample_names)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
##
## F test to compare two variances
##
## data: Aerobic.mesophile RM eRM
## F = 1259.2, num df = 2, denom df = 2, p-value = 0.001587
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 32.28679 49108.21118
## sample estimates:
## ratio of variances
## 1259.185
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: Aerobic.mesophile RM eRM
## t = -8.3527, df = 2.0032, p-value = 0.01397
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.533301 -2.095297
## sample estimates:
## mean of x mean of y
## 4.886033 9.200332
## --------------------------------------------------------------------------------
## Streptococci
##
## F test to compare two variances
##
## data: Streptococci RM eRM
## F = 19.668, num df = 2, denom df = 2, p-value = 0.09677
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5042989 767.0386708
## sample estimates:
## ratio of variances
## 19.66766
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Streptococci RM eRM
## t = -8.9479, df = 4, p-value = 0.0008629
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.298119 -3.315205
## sample estimates:
## mean of x mean of y
## 4.297291 9.103953
## --------------------------------------------------------------------------------
## Lactobacilli
##
## F test to compare two variances
##
## data: Lactobacilli RM eRM
## F = 7.3998, num df = 2, denom df = 2, p-value = 0.2381
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.189739 288.593026
## sample estimates:
## ratio of variances
## 7.399821
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Lactobacilli RM eRM
## t = -15.478, df = 4, p-value = 0.0001017
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.079545 -4.926023
## sample estimates:
## mean of x mean of y
## 3.138678 9.141463
## --------------------------------------------------------------------------------
## FH.lactobacilli
##
## F test to compare two variances
##
## data: FH.lactobacilli RM eRM
## F = 0.066598, num df = 2, denom df = 2, p-value = 0.1249
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.001707631 2.597307275
## sample estimates:
## ratio of variances
## 0.06659762
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: FH.lactobacilli RM eRM
## t = -7.4693, df = 4, p-value = 0.001717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.520483 -1.612494
## sample estimates:
## mean of x mean of y
## 1.012043 3.578532
## --------------------------------------------------------------------------------
## Enterococci
##
## F test to compare two variances
##
## data: Enterococci RM eRM
## F = 0.066201, num df = 2, denom df = 2, p-value = 0.1242
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.001697462 2.581839190
## sample estimates:
## ratio of variances
## 0.066201
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Enterococci RM eRM
## t = -5.6935, df = 4, p-value = 0.004701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.732632 -2.318703
## sample estimates:
## mean of x mean of y
## 2.308226 6.833894
## --------------------------------------------------------------------------------
## Yeasts.and.molds
##
## F test to compare two variances
##
## data: Yeasts.and.molds RM eRM
## F = 0.36077, num df = 2, denom df = 2, p-value = 0.5302
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.00925047 14.06996547
## sample estimates:
## ratio of variances
## 0.3607683
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Yeasts.and.molds RM eRM
## t = -12.302, df = 4, p-value = 0.0002508
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.250444 -2.685144
## sample estimates:
## mean of x mean of y
## 1.135165 4.602959
## --------------------------------------------------------------------------------
## Staphylococci
##
## F test to compare two variances
##
## data: Staphylococci RM eRM
## F = 0.32397, num df = 2, denom df = 2, p-value = 0.4894
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.008306854 12.634724784
## sample estimates:
## ratio of variances
## 0.3239673
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Staphylococci RM eRM
## t = -7.4707, df = 4, p-value = 0.001716
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.483523 -1.595822
## sample estimates:
## mean of x mean of y
## 4.251834 6.791506
## --------------------------------------------------------------------------------
## Enterobacteriaceae
##
## F test to compare two variances
##
## data: Enterobacteriaceae RM eRM
## F = 0.17114, num df = 2, denom df = 2, p-value = 0.2923
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.004388145 6.674369066
## sample estimates:
## ratio of variances
## 0.1711377
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Enterobacteriaceae RM eRM
## t = -9.1823, df = 4, p-value = 0.0007812
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.185142 -3.313153
## sample estimates:
## mean of x mean of y
## 1.715616 6.464764
Statistical analysis was performed in order to define whether the evaluated differences were significant. If the variances of the two groups resulted homogeneous to the Fisher’s test, a Two Sample t-test was applied; alternatively a Welch t-test was used.
sample_names <- c("RM.H", "eRM.H")
subdf3 <- filter(df2, sample %in% sample_names)
m_vars = unique(subdf3$variable)
for (x in m_vars) {
test_two_grps(x, subdf3, sample_names)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
##
## F test to compare two variances
##
## data: Aerobic.mesophile RM.H eRM.H
## F = 0.31509, num df = 2, denom df = 2, p-value = 0.4792
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.008079148 12.288383389
## sample estimates:
## ratio of variances
## 0.3150868
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Aerobic.mesophile RM.H eRM.H
## t = -20.94, df = 4, p-value = 3.074e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.49363 -4.97324
## sample estimates:
## mean of x mean of y
## 2.147064 7.880499
## --------------------------------------------------------------------------------
## Streptococci
##
## F test to compare two variances
##
## data: Streptococci RM.H eRM.H
## F = 0.14824, num df = 2, denom df = 2, p-value = 0.2582
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.00380113 5.78151875
## sample estimates:
## ratio of variances
## 0.1482441
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Streptococci RM.H eRM.H
## t = -16.409, df = 4, p-value = 8.075e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.294157 -5.183017
## sample estimates:
## mean of x mean of y
## 1.441008 7.679595
## --------------------------------------------------------------------------------
## Lactobacilli
##
## F test to compare two variances
##
## data: Lactobacilli RM.H eRM.H
## F = 0.074013, num df = 2, denom df = 2, p-value = 0.1378
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.001897766 2.886501454
## sample estimates:
## ratio of variances
## 0.07401286
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Lactobacilli RM.H eRM.H
## t = -9.3259, df = 4, p-value = 0.0007359
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.768099 -4.745050
## sample estimates:
## mean of x mean of y
## 0.8063764 7.5629511
## --------------------------------------------------------------------------------
## FH.lactobacilli
##
## F test to compare two variances
##
## data: FH.lactobacilli RM.H eRM.H
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0 0
## sample estimates:
## ratio of variances
## 0
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: FH.lactobacilli RM.H eRM.H
## t = -7.5974, df = 2, p-value = 0.01689
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.777612 -1.322770
## sample estimates:
## mean of x mean of y
## 0.000000 3.050191
## --------------------------------------------------------------------------------
## Enterococci
##
## F test to compare two variances
##
## data: Enterococci RM.H eRM.H
## F = 0.2267, num df = 2, denom df = 2, p-value = 0.3696
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.005812843 8.841334385
## sample estimates:
## ratio of variances
## 0.2267009
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Enterococci RM.H eRM.H
## t = -14.306, df = 4, p-value = 0.0001387
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.421818 -5.009321
## sample estimates:
## mean of x mean of y
## 0.7391613 6.9547307
## --------------------------------------------------------------------------------
## Yeasts.and.molds
##
## F test to compare two variances
##
## data: Yeasts.and.molds RM.H eRM.H
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0 0
## sample estimates:
## ratio of variances
## 0
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: Yeasts.and.molds RM.H eRM.H
## t = -4.6892, df = 2, p-value = 0.04259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.2665373 -0.3123594
## sample estimates:
## mean of x mean of y
## 0.000000 3.789448
## --------------------------------------------------------------------------------
## Staphylococci
##
## F test to compare two variances
##
## data: Staphylococci RM.H eRM.H
## F = 0.0010706, num df = 2, denom df = 2, p-value = 0.002139
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.745184e-05 4.175425e-02
## sample estimates:
## ratio of variances
## 0.001070622
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: Staphylococci RM.H eRM.H
## t = -9.8263, df = 2.0043, p-value = 0.01013
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.054171 -3.155723
## sample estimates:
## mean of x mean of y
## 1.006372 6.611319
## --------------------------------------------------------------------------------
## Enterobacteriaceae
##
## F test to compare two variances
##
## data: Enterobacteriaceae RM.H eRM.H
## F = 0.078156, num df = 2, denom df = 2, p-value = 0.145
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.002003998 3.048080460
## sample estimates:
## ratio of variances
## 0.07815591
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Enterobacteriaceae RM.H eRM.H
## t = -11.127, df = 4, p-value = 0.0003712
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.125806 -4.880450
## sample estimates:
## mean of x mean of y
## 0.2816993 6.7848271
sample_names <- c("RM.H", "eRM.HS")
subdf4 <- filter(df2, sample %in% sample_names)
m_vars = unique(subdf4$variable)
for (x in m_vars) {
test_two_grps(x, subdf4, sample_names)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
##
## F test to compare two variances
##
## data: Aerobic.mesophile RM.H eRM.HS
## F = 0.38303, num df = 2, denom df = 2, p-value = 0.5539
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.009821308 14.938209596
## sample estimates:
## ratio of variances
## 0.383031
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Aerobic.mesophile RM.H eRM.HS
## t = -17.51, df = 4, p-value = 6.246e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.166273 -3.752135
## sample estimates:
## mean of x mean of y
## 2.147064 6.606268
## --------------------------------------------------------------------------------
## Streptococci
##
## F test to compare two variances
##
## data: Streptococci RM.H eRM.HS
## F = 0.039259, num df = 2, denom df = 2, p-value = 0.07555
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.001006646 1.531108560
## sample estimates:
## ratio of variances
## 0.03925919
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Streptococci RM.H eRM.HS
## t = -6.6938, df = 4, p-value = 0.002591
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.656113 -2.753282
## sample estimates:
## mean of x mean of y
## 1.441008 6.145705
## --------------------------------------------------------------------------------
## Lactobacilli
##
## F test to compare two variances
##
## data: Lactobacilli RM.H eRM.HS
## F = 0.18764, num df = 2, denom df = 2, p-value = 0.316
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.004811186 7.317814009
## sample estimates:
## ratio of variances
## 0.1876363
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Lactobacilli RM.H eRM.HS
## t = -7.8196, df = 4, p-value = 0.001444
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.070072 -2.413097
## sample estimates:
## mean of x mean of y
## 0.8063764 4.5479608
## --------------------------------------------------------------------------------
## FH.lactobacilli
##
## F test to compare two variances
##
## data: FH.lactobacilli RM.H eRM.HS
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0 0
## sample estimates:
## ratio of variances
## 0
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: FH.lactobacilli RM.H eRM.HS
## t = -1, df = 2, p-value = 0.4226
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.874837 1.167706
## sample estimates:
## mean of x mean of y
## 0.0000000 0.3535659
## --------------------------------------------------------------------------------
## Enterococci
##
## F test to compare two variances
##
## data: Enterococci RM.H eRM.HS
## F = 0.24012, num df = 2, denom df = 2, p-value = 0.3873
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.006156912 9.364662884
## sample estimates:
## ratio of variances
## 0.2401196
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Enterococci RM.H eRM.HS
## t = -9.2414, df = 4, p-value = 0.0007622
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.100919 -2.744015
## sample estimates:
## mean of x mean of y
## 0.7391613 4.6616279
## --------------------------------------------------------------------------------
## Yeasts.and.molds
##
## F test to compare two variances
##
## data: Yeasts.and.molds RM.H eRM.HS
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0 0
## sample estimates:
## ratio of variances
## 0
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: Yeasts.and.molds RM.H eRM.HS
## t = -1.5697, df = 2, p-value = 0.257
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.027149 2.339544
## sample estimates:
## mean of x mean of y
## 0.000000 1.343803
## --------------------------------------------------------------------------------
## Staphylococci
##
## F test to compare two variances
##
## data: Staphylococci RM.H eRM.HS
## F = 0.00079951, num df = 2, denom df = 2, p-value = 0.001598
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.050016e-05 3.118074e-02
## sample estimates:
## ratio of variances
## 0.0007995061
## ------------------------------------------------------------------
##
## Welch Two Sample t-test
##
## data: Staphylococci RM.H eRM.HS
## t = -7.6195, df = 2.0032, p-value = 0.01672
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.864048 -2.193415
## sample estimates:
## mean of x mean of y
## 1.006372 6.035103
## --------------------------------------------------------------------------------
## Enterobacteriaceae
##
## F test to compare two variances
##
## data: Enterobacteriaceae RM.H eRM.HS
## F = 0.4085, num df = 2, denom df = 2, p-value = 0.58
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.01047432 15.93144659
## sample estimates:
## ratio of variances
## 0.4084986
## ------------------------------------------------------------------
##
## Two Sample t-test
##
## data: Enterobacteriaceae RM.H eRM.HS
## t = -11.698, df = 4, p-value = 0.0003054
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.229203 -2.606700
## sample estimates:
## mean of x mean of y
## 0.2816993 3.6996508
Here is shown the microbial enrichment of RM.H, also with addition of salt.
Since here there are more then 2 groups, a one-way ANOVA model was needed for the statistical analysis. The variance homogeneity was previously checked using the Bartlett’s test. In case the assumptions of one-way ANOVA test are not met, a Kruskal-Wallis test is applied. A Multiple Mean Comparison test was eventually performed afterwards.
subdf5 <- filter(df2, sample %in% c("RM.H", "eRM.H", "eRM.HS"))
m_vars = unique(subdf5$variable)
for (x in m_vars) {
test_more_grps(x, subdf5)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
##
## Bartlett test of homogeneity of variances
##
## data: Aerobic.mesophile
## Bartlett's K-squared = 0.54578, df = 2, p-value = 0.7612
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 2 54.38 27.190 223.1 2.34e-06 ***
## Residuals 6 0.73 0.122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRM.HS-eRM.H -1.274231 -2.148773 -0.3996888 0.0100609
## RM.H-eRM.H -5.733435 -6.607977 -4.8588929 0.0000023
## RM.H-eRM.HS -4.459204 -5.333746 -3.5846623 0.0000102
## --------------------------------------------------------------------------------
## Streptococci
##
## Bartlett test of homogeneity of variances
##
## data: Streptococci
## Bartlett's K-squared = 3.3822, df = 2, p-value = 0.1843
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 2 63.41 31.70 51.15 0.00017 ***
## Residuals 6 3.72 0.62
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRM.HS-eRM.H -1.533890 -3.506316 0.4385357 0.1180403
## RM.H-eRM.H -6.238587 -8.211013 -4.2661615 0.0001692
## RM.H-eRM.HS -4.704697 -6.677123 -2.7322715 0.0008120
## --------------------------------------------------------------------------------
## Lactobacilli
##
## Bartlett test of homogeneity of variances
##
## data: Lactobacilli
## Bartlett's K-squared = 2.2755, df = 2, p-value = 0.3205
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 2 68.74 34.37 47.89 0.000205 ***
## Residuals 6 4.31 0.72
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRM.HS-eRM.H -3.014990 -5.137313 -0.8926677 0.0113229
## RM.H-eRM.H -6.756575 -8.878897 -4.6342520 0.0001630
## RM.H-eRM.HS -3.741584 -5.863907 -1.6192617 0.0039732
## --------------------------------------------------------------------------------
## FH.lactobacilli
##
## Bartlett test of homogeneity of variances
##
## data: FH.lactobacilli
## Bartlett's K-squared = Inf, df = 2, p-value < 2.2e-16
## ------------------------------------------------------------------
##
## Kruskal-Wallis rank sum test
##
## data: FH.lactobacilli
## Kruskal-Wallis chi-squared = 6.72, df = 2, p-value = 0.03474
## ------------------------------------------------------------------
## Kruskal-Wallis rank sum test
##
## data: x and g
## Kruskal-Wallis chi-squared = 6.72, df = 2, p-value = 0.03
##
##
## Comparison of x by g
## (Bonferroni)
## Col Mean-|
## Row Mean | eRM.H eRM.HS
## ---------+----------------------
## eRM.HS | 1.959591
## | 0.1501
## |
## RM.H | 2.449489 0.489897
## | 0.0429* 1.0000
##
## alpha = 0.05
## Reject Ho if p <= alpha
## --------------------------------------------------------------------------------
## Enterococci
##
## Bartlett test of homogeneity of variances
##
## data: Enterococci
## Bartlett's K-squared = 0.93453, df = 2, p-value = 0.6267
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 2 59.28 29.639 88.73 3.5e-05 ***
## Residuals 6 2.00 0.334
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRM.HS-eRM.H -2.293103 -3.741002 -0.8452031 0.0067553
## RM.H-eRM.H -6.215569 -7.663469 -4.7676698 0.0000291
## RM.H-eRM.HS -3.922467 -5.370366 -2.4745670 0.0004034
## --------------------------------------------------------------------------------
## Yeasts.and.molds
##
## Bartlett test of homogeneity of variances
##
## data: Yeasts.and.molds
## Bartlett's K-squared = Inf, df = 2, p-value < 2.2e-16
## ------------------------------------------------------------------
##
## Kruskal-Wallis rank sum test
##
## data: Yeasts.and.molds
## Kruskal-Wallis chi-squared = 5.8424, df = 2, p-value = 0.05387
## --------------------------------------------------------------------------------
## Staphylococci
##
## Bartlett test of homogeneity of variances
##
## data: Staphylococci
## Bartlett's K-squared = 9.4948, df = 2, p-value = 0.008674
## ------------------------------------------------------------------
##
## Kruskal-Wallis rank sum test
##
## data: Staphylococci
## Kruskal-Wallis chi-squared = 5.4222, df = 2, p-value = 0.06646
## --------------------------------------------------------------------------------
## Enterobacteriaceae
##
## Bartlett test of homogeneity of variances
##
## data: Enterobacteriaceae
## Bartlett's K-squared = 2.6555, df = 2, p-value = 0.2651
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 2 63.49 31.75 78.93 4.91e-05 ***
## Residuals 6 2.41 0.40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRM.HS-eRM.H -3.085176 -4.673958 -1.496395 0.0024235
## RM.H-eRM.H -6.503128 -8.091909 -4.914346 0.0000384
## RM.H-eRM.HS -3.417951 -5.006733 -1.829170 0.0014148
This boxplot shows the microbial composition of the final cultures, obtained by incubating the mixture of NWC and the enriched milks.
Since here there are more then 2 groups, a one-way ANOVA model was needed for the statistical analysis. The variance homogeneity was previously checked using the Bartlett’s test. In case the assumptions of one-way ANOVA test are not met, a Kruskal-Wallis test is applied. A Multiple Mean Comparison test was eventually performed afterwards.
selected_samples <- c("NWC", "eRWC.y", "eRWC.o", "eRWC.H.y", "eRWC.H.o", "eRWC.HS.y", "eRWC.HS.o")
subdf6 <- filter(df2, sample %in% selected_samples)
m_vars = unique(subdf6$variable)
for (x in m_vars) {
test_more_grps(x, subdf6)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
##
## Bartlett test of homogeneity of variances
##
## data: Aerobic.mesophile
## Bartlett's K-squared = 0.66898, df = 6, p-value = 0.9951
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 6.232 1.0386 1.615 0.215
## Residuals 14 9.005 0.6432
## --------------------------------------------------------------------------------
## Streptococci
##
## Bartlett test of homogeneity of variances
##
## data: Streptococci
## Bartlett's K-squared = 5.1667, df = 6, p-value = 0.5226
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 3.93 0.6550 0.797 0.588
## Residuals 14 11.50 0.8217
## --------------------------------------------------------------------------------
## Lactobacilli
##
## Bartlett test of homogeneity of variances
##
## data: Lactobacilli
## Bartlett's K-squared = 5.5485, df = 6, p-value = 0.4756
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 1.978 0.3296 0.251 0.951
## Residuals 14 18.379 1.3128
## --------------------------------------------------------------------------------
## FH.lactobacilli
##
## Bartlett test of homogeneity of variances
##
## data: FH.lactobacilli
## Bartlett's K-squared = 4.4453, df = 6, p-value = 0.6166
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 4.457 0.7429 0.677 0.671
## Residuals 14 15.372 1.0980
## --------------------------------------------------------------------------------
## Enterococci
##
## Bartlett test of homogeneity of variances
##
## data: Enterococci
## Bartlett's K-squared = 8.6924, df = 6, p-value = 0.1916
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 33.15 5.525 4.21 0.0125 *
## Residuals 14 18.37 1.312
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRWC.H.y-eRWC.H.o -0.28049937 -3.4745060 2.9135072 0.9999160
## eRWC.HS.o-eRWC.H.o -2.04331342 -5.2373200 1.1506932 0.3609038
## eRWC.HS.y-eRWC.H.o -2.85237603 -6.0463826 0.3416306 0.0946776
## eRWC.o-eRWC.H.o -0.65004973 -3.8440564 2.5439569 0.9907539
## eRWC.y-eRWC.H.o -0.36719217 -3.5611988 2.8268145 0.9996009
## NWC-eRWC.H.o -3.32247364 -6.5164803 -0.1284670 0.0391093
## eRWC.HS.o-eRWC.H.y -1.76281405 -4.9568207 1.4311926 0.5195760
## eRWC.HS.y-eRWC.H.y -2.57187665 -5.7658833 0.6221300 0.1560139
## eRWC.o-eRWC.H.y -0.36955036 -3.5635570 2.8244563 0.9995860
## eRWC.y-eRWC.H.y -0.08669279 -3.2806994 3.1073138 0.9999999
## NWC-eRWC.H.y -3.04197427 -6.2359809 0.1520323 0.0666390
## eRWC.HS.y-eRWC.HS.o -0.80906261 -4.0030692 2.3849440 0.9723376
## eRWC.o-eRWC.HS.o 1.39326369 -1.8007429 4.5872703 0.7464837
## eRWC.y-eRWC.HS.o 1.67612125 -1.5178854 4.8701279 0.5730612
## NWC-eRWC.HS.o -1.27916022 -4.4731668 1.9148464 0.8095251
## eRWC.o-eRWC.HS.y 2.20232629 -0.9916803 5.3963329 0.2854987
## eRWC.y-eRWC.HS.y 2.48518386 -0.7088228 5.6791905 0.1809168
## NWC-eRWC.HS.y -0.47009762 -3.6641042 2.7239090 0.9983891
## eRWC.y-eRWC.o 0.28285757 -2.9111491 3.4768642 0.9999118
## NWC-eRWC.o -2.67242391 -5.8664305 0.5215827 0.1308671
## NWC-eRWC.y -2.95528148 -6.1492881 0.2387251 0.0783336
## --------------------------------------------------------------------------------
## Yeasts.and.molds
##
## Bartlett test of homogeneity of variances
##
## data: Yeasts.and.molds
## Bartlett's K-squared = 7.1439, df = 6, p-value = 0.3077
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 48.08 8.013 3.237 0.0329 *
## Residuals 14 34.66 2.476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRWC.H.y-eRWC.H.o -1.264466193 -5.651305 3.122373 0.9494909
## eRWC.HS.o-eRWC.H.o -1.442574233 -5.829413 2.944265 0.9105900
## eRWC.HS.y-eRWC.H.o -3.117058388 -7.503898 1.269781 0.2572630
## eRWC.o-eRWC.H.o -0.007373058 -4.394212 4.379466 1.0000000
## eRWC.y-eRWC.H.o -1.559289033 -5.946128 2.827550 0.8776525
## NWC-eRWC.H.o -4.526231162 -8.913070 -0.139392 0.0411874
## eRWC.HS.o-eRWC.H.y -0.178108041 -4.564947 4.208731 0.9999991
## eRWC.HS.y-eRWC.H.y -1.852592195 -6.239431 2.534247 0.7717983
## eRWC.o-eRWC.H.y 1.257093135 -3.129746 5.643932 0.9508071
## eRWC.y-eRWC.H.y -0.294822841 -4.681662 4.092016 0.9999825
## NWC-eRWC.H.y -3.261764969 -7.648604 1.125074 0.2172994
## eRWC.HS.y-eRWC.HS.o -1.674484155 -6.061323 2.712355 0.8396806
## eRWC.o-eRWC.HS.o 1.435201176 -2.951638 5.822040 0.9124744
## eRWC.y-eRWC.HS.o -0.116714800 -4.503554 4.270124 0.9999999
## NWC-eRWC.HS.o -3.083656929 -7.470496 1.303182 0.2672361
## eRWC.o-eRWC.HS.y 3.109685330 -1.277154 7.496524 0.2594401
## eRWC.y-eRWC.HS.y 1.557769355 -2.829070 5.944608 0.8781182
## NWC-eRWC.HS.y -1.409172774 -5.796012 2.977666 0.9189370
## eRWC.y-eRWC.o -1.551915976 -5.938755 2.834923 0.8799031
## NWC-eRWC.o -4.518858104 -8.905697 -0.132019 0.0416135
## NWC-eRWC.y -2.966942129 -7.353781 1.419897 0.3043144
## --------------------------------------------------------------------------------
## Staphylococci
##
## Bartlett test of homogeneity of variances
##
## data: Staphylococci
## Bartlett's K-squared = 6.3221, df = 6, p-value = 0.3881
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 29.4 4.901 3.965 0.0158 *
## Residuals 14 17.3 1.236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRWC.H.y-eRWC.H.o 2.2577275 -0.8416620 5.3571170 0.2349343
## eRWC.HS.o-eRWC.H.o -1.7374481 -4.8368376 1.3619414 0.5027973
## eRWC.HS.y-eRWC.H.o -0.3140636 -3.4134532 2.7853259 0.9998070
## eRWC.o-eRWC.H.o -0.1088761 -3.2082657 2.9905134 0.9999996
## eRWC.y-eRWC.H.o 0.7616601 -2.3377294 3.8610496 0.9761075
## NWC-eRWC.H.o -0.9986391 -4.0980286 2.1007504 0.9178924
## eRWC.HS.o-eRWC.H.y -3.9951756 -7.0945652 -0.8957861 0.0083527
## eRWC.HS.y-eRWC.H.y -2.5717911 -5.6711807 0.5275984 0.1360737
## eRWC.o-eRWC.H.y -2.3666037 -5.4659932 0.7327859 0.1955352
## eRWC.y-eRWC.H.y -1.4960674 -4.5954569 1.6033221 0.6569113
## NWC-eRWC.H.y -3.2563666 -6.3557561 -0.1569771 0.0366836
## eRWC.HS.y-eRWC.HS.o 1.4233845 -1.6760051 4.5227740 0.7028500
## eRWC.o-eRWC.HS.o 1.6285720 -1.4708176 4.7279615 0.5717119
## eRWC.y-eRWC.HS.o 2.4991082 -0.6002813 5.5984977 0.1550668
## NWC-eRWC.HS.o 0.7388090 -2.3605805 3.8381985 0.9794171
## eRWC.o-eRWC.HS.y 0.2051875 -2.8942020 3.3045770 0.9999840
## eRWC.y-eRWC.HS.y 1.0757237 -2.0236658 4.1751133 0.8886383
## NWC-eRWC.HS.y -0.6845755 -3.7839650 2.4148141 0.9859347
## eRWC.y-eRWC.o 0.8705362 -2.2288533 3.9699258 0.9550925
## NWC-eRWC.o -0.8897630 -3.9891525 2.2096266 0.9504049
## NWC-eRWC.y -1.7602992 -4.8596887 1.3390903 0.4886678
## --------------------------------------------------------------------------------
## Enterobacteriaceae
##
## Bartlett test of homogeneity of variances
##
## data: Enterobacteriaceae
## Bartlett's K-squared = 2.7228, df = 6, p-value = 0.8427
## ------------------------------------------------------------------
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 6 50.14 8.357 3.368 0.0287 *
## Residuals 14 34.74 2.482
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = CFU_mL ~ sample)
##
## $sample
## diff lwr upr p adj
## eRWC.H.y-eRWC.H.o 1.21248499 -3.179522 5.604492081 0.9585151
## eRWC.HS.o-eRWC.H.o -1.58228721 -5.974294 2.809719879 0.8710773
## eRWC.HS.y-eRWC.H.o -2.06572471 -6.457732 2.326282379 0.6812679
## eRWC.o-eRWC.H.o 1.18780958 -3.204198 5.579816665 0.9622874
## eRWC.y-eRWC.H.o -0.46109591 -4.853103 3.930911180 0.9997632
## NWC-eRWC.H.o -3.21047179 -7.602479 1.181535292 0.2318928
## eRWC.HS.o-eRWC.H.y -2.79477220 -7.186779 1.597234885 0.3664833
## eRWC.HS.y-eRWC.H.y -3.27820970 -7.670217 1.113797384 0.2140684
## eRWC.o-eRWC.H.y -0.02467542 -4.416683 4.367331671 1.0000000
## eRWC.y-eRWC.H.y -1.67358090 -6.065588 2.718426186 0.8406891
## NWC-eRWC.H.y -4.42295679 -8.814964 -0.030949702 0.0479018
## eRWC.HS.y-eRWC.HS.o -0.48343750 -4.875445 3.908569587 0.9996890
## eRWC.o-eRWC.HS.o 2.77009679 -1.621910 7.162103873 0.3758113
## eRWC.y-eRWC.HS.o 1.12119130 -3.270816 5.513198389 0.9712887
## NWC-eRWC.HS.o -1.62818459 -6.020192 2.763822501 0.8562009
## eRWC.o-eRWC.HS.y 3.25353429 -1.138473 7.645541374 0.2204311
## eRWC.y-eRWC.HS.y 1.60462880 -2.787378 5.996635889 0.8639396
## NWC-eRWC.HS.y -1.14474709 -5.536754 3.247260001 0.9682985
## eRWC.y-eRWC.o -1.64890548 -6.040913 2.743101603 0.8492166
## NWC-eRWC.o -4.39828137 -8.790288 -0.006274285 0.0495677
## NWC-eRWC.y -2.74937589 -7.141383 1.642631199 0.3837524
This R script will analyze 16S results from ASV counts to graphical visualization mainly using phyloseq http://joey711.github.io/phyloseq/import-data.html
# import raw sequences that were already quality filtered as described in Dreier et al 2022
fp <- file.path(getwd(), "..", "data", "cultures", "ngs", "ASV_table.xlsx")
cult_physeq <- load_ngs_data(fp)
# Counts to relative abundance
# ASV counts transformed to relative abundance (%)
# Note: raw sequences were already quality filtered as described in Dreier et al 2022
# thus taxa are not further trimmed for ordination and diversity analysis
# since even low abundant species are informative
cult_physeqR = transform_sample_counts(cult_physeq, function(x) x/sum(x)*100 )
# font sizes for plots
yaxis_tick_label_size = 10
yaxis_label_size = 10
title_size = 14
subtitle_size = 14
xaxis_tick_label_size = 12
# Summarize diversity values in table
metadata <- as(sample_data(cult_physeq), "data.frame")
richness_table <- estimate_richness(cult_physeq)
richness_table["Sample"] <- metadata["sample_subtype"]
richness_table["replicate"] <- metadata["replicate"]
# Reorder columns
richness_table <- richness_table[, c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8 , 9)]
richness_table
## Sample replicate Observed Chao1 se.chao1 ACE se.ACE
## RM.2 RM 1 187 187.3333 0.9257269 187.45666 5.5240883
## NWC.2 NWC 1 8 8.0000 0.0000000 8.00000 1.2247449
## eRM.2 eRM 1 33 33.0000 0.2461830 33.38948 2.2959662
## eRM.H.2 eRM.H 1 27 27.0000 0.0000000 27.00000 1.6329932
## eRM.HS.2 eRM.HS 1 63 63.0000 0.0000000 63.00000 3.7838420
## eRWC.y.2 eRWC.y 1 19 19.0000 0.0000000 19.00000 1.3377121
## eRWC.H.y.2 eRWC.H.y 1 27 27.0000 0.1635511 27.57143 1.6471079
## eRWC.HS.y.2 eRWC.HS.y 1 13 13.0000 0.0000000 13.00000 1.3008873
## eRWC.o.2 eRWC.o 1 27 27.0000 0.0000000 27.00000 2.0184336
## eRWC.H.o.2 eRWC.H.o 1 29 29.0000 0.4913037 29.32346 2.1534176
## eRWC.HS.o.2 eRWC.HS.o 1 19 19.0000 0.0000000 19.00000 1.3377121
## RM.3 RM 2 178 178.0000 0.0000000 178.00000 2.2044388
## NWC.3 NWC 2 12 12.0000 0.0000000 12.00000 0.9574271
## eRM.3 eRM 2 25 25.0000 0.4898979 25.22141 2.1781484
## eRM.H.3 eRM.H 2 34 34.0000 0.1641974 34.65021 1.7217992
## eRM.HS.3 eRM.HS 2 38 38.0000 0.0000000 38.00000 2.7956733
## eRWC.y.3 eRWC.y 2 30 30.0000 0.2457980 30.66233 2.2014708
## eRWC.H.y.3 eRWC.H.y 2 24 25.0000 2.3108440 24.96505 2.2482432
## eRWC.HS.y.3 eRWC.HS.y 2 22 22.0000 0.1221261 22.71096 2.2389837
## eRWC.o.3 eRWC.o 2 31 31.0000 0.0000000 31.00000 1.6461098
## eRWC.H.o.3 eRWC.H.o 2 21 21.0000 0.0000000 21.00000 1.7994708
## eRWC.HS.o.3 eRWC.HS.o 2 25 25.0000 0.0000000 25.00000 1.3564660
## RM.4 RM 3 125 125.0000 0.4979960 125.18280 4.3009922
## NWC.4 NWC 3 9 9.0000 0.0000000 9.00000 0.9428090
## eRM.4 eRM 3 31 31.0000 0.4918694 31.51953 1.6129265
## eRM.H.4 eRM.H 3 48 48.0000 0.0000000 48.00000 0.9895285
## eRM.HS.4 eRM.HS 3 40 40.0000 0.4937104 40.25598 2.1854976
## eRWC.y.4 eRWC.y 3 30 30.0000 0.0000000 30.00000 2.5099801
## eRWC.H.y.4 eRWC.H.y 3 26 26.0000 0.0000000 26.00000 2.4806947
## eRWC.HS.y.4 eRWC.HS.y 3 18 19.0000 2.2998856 18.74896 2.0054355
## eRWC.o.4 eRWC.o 3 28 28.0000 0.0000000 28.00000 1.8516402
## eRWC.H.o.4 eRWC.H.o 3 25 25.0000 0.2449490 25.51689 2.1424747
## eRWC.HS.o.4 eRWC.HS.o 3 23 23.0000 0.0000000 23.00000 1.8177865
## Shannon Simpson InvSimpson Fisher
## RM.2 3.6340335 0.90893777 10.981501 28.4912451
## NWC.2 0.2132335 0.08356654 1.091187 0.6995682
## eRM.2 1.7778360 0.75780040 4.128826 3.6674951
## eRM.H.2 2.1073510 0.82641140 5.760747 2.9650366
## eRM.HS.2 0.8408825 0.32036464 1.471377 6.3968941
## eRWC.y.2 0.6092499 0.23636872 1.309533 1.8483752
## eRWC.H.y.2 0.7112148 0.28082089 1.390474 2.6807118
## eRWC.HS.y.2 0.3231377 0.12569873 1.143770 1.0803211
## eRWC.o.2 1.3189694 0.59890631 2.493183 2.5140674
## eRWC.H.o.2 0.9781383 0.40779791 1.688613 2.7313235
## eRWC.HS.o.2 1.1188118 0.58937175 2.435293 1.7195216
## RM.3 3.4430962 0.89258676 9.309839 21.1232946
## NWC.3 0.3346318 0.13629215 1.157799 1.0285560
## eRM.3 0.4885485 0.17391688 1.210532 2.5633656
## eRM.H.3 2.2477054 0.82588566 5.743352 3.3467815
## eRM.HS.3 1.8897852 0.81178899 5.313185 3.6397647
## eRWC.y.3 0.5983486 0.28911774 1.406703 2.7803261
## eRWC.H.y.3 0.4232009 0.17189234 1.207572 2.2030228
## eRWC.HS.y.3 0.5881647 0.26935073 1.368646 1.9181873
## eRWC.o.3 1.5189914 0.68263234 3.150920 2.8420564
## eRWC.H.o.3 1.1320277 0.47805487 1.915910 1.8527952
## eRWC.HS.o.3 1.5286237 0.71463170 3.504243 2.2253392
## RM.4 2.3087924 0.78350260 4.618993 13.9592564
## NWC.4 0.2276936 0.08177288 1.089055 0.7896196
## eRM.4 0.4034391 0.12115649 1.137859 3.0807798
## eRM.H.4 2.1397921 0.78753973 4.706762 4.6158258
## eRM.HS.4 0.8119889 0.30681802 1.442623 3.7345737
## eRWC.y.4 0.2836259 0.09028779 1.099249 2.9459226
## eRWC.H.y.4 0.3643399 0.12334999 1.140706 2.5463340
## eRWC.HS.y.4 0.3116122 0.11367379 1.128253 1.6641025
## eRWC.o.4 0.4497640 0.14913289 1.175272 2.7356668
## eRWC.H.o.4 0.4514010 0.15689793 1.186096 2.4200027
## eRWC.HS.o.4 0.5752884 0.23815353 1.312600 2.1386689
# write to excel
write.xlsx(richness_table, '../tables/cultures_richness_table.xlsx', colNames = TRUE, rowNames = TRUE, asTable = FALSE, keepNA = FALSE, na.string = "NaN")
# Beta diversity Permanova for all samples
adonis2(distance(cult_physeq, method="bray") ~ sample_subtype, data = metadata, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = distance(cult_physeq, method = "bray") ~ sample_subtype, data = metadata, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## sample_subtype 10 5.1665 0.41167 1.5394 0.0176 *
## Residual 22 7.3837 0.58833
## Total 32 12.5502 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# filter for cultures
cultures_only_phy <- subset_samples(cult_physeq, sample_type == "eRWC")
# cultures metadata
c_metadata <- as(sample_data(cultures_only_phy), "data.frame")
# Beta diversity Permanova
adonis2(distance(cultures_only_phy, method="bray") ~ sample_subtype, data = c_metadata, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = distance(cultures_only_phy, method = "bray") ~ sample_subtype, data = c_metadata, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## sample_subtype 5 1.1375 0.23204 0.7252 0.7015
## Residual 12 3.7647 0.76796
## Total 17 4.9022 1.00000
# percentage thresholds used for filtering
abund_thresh <- 1.0
sp_res_th <- 0.6
# Taxa agglomeration----
# Differently to Ord. & Div. plots, in bar charts a taxa filter is necessary to show the most abundant species
# Let's agglomerate at the Species level using the tax_glom function from phyloseq
# It requires "Kingdom" to be the first column of our tax_table
# (see probl. solved https://github.com/joey711/phyloseq/issues/1551)
tax_table(cult_physeqR) <- tax_table(cult_physeqR)[,c(2:10,1)] # Re-order the columns
# Let's agglomerate at the Species level
# i.e. different ASV belonging to the same species will be agglomerated
# their relative abundance will be summed
# further the data frame is melted to long format for filtering and plotting
cult_physeqR_tax <- cult_physeqR %>%
tax_glom(taxrank = "Species") %>%
psmelt()
# Select only the natural whey and the enriched raw milk-whey culture samples
cult_physeqR_tax_samples = subset(cult_physeqR_tax, sample_type %in% c("eRWC", "NWC"))
# create a mapper to replace old lactobacilli names in table
fp = file.path(getwd(), "..", "data", "species_dict.json")
species_dict <- fromJSON(file = fp, simplify = TRUE)
# In order to get groups in the plots add a group category and rename replicates
cult_physeqR_tax_samples <- cult_physeqR_tax_samples %>%
mutate(sample_subtype = substr(sample_ID, 1, nchar(sample_ID) - 2)) %>%
mutate(replicate = replicate - 1)
# For plotting it looks nicer when the species names are separated by spaces
cult_physeqR_tax_samples <- cult_physeqR_tax_samples %>%
mutate(Species = str_replace(Species, "_", " ")) %>%
# update new lactobacilli names
mutate(Species = recode_factor(Species, !!!species_dict)) %>%
# change Species to sp. for unknown species names
mutate(Species = str_replace(Species, "Species", "sp."))
# group data frame by Species, calculate mean rel. abundance
means <- ddply(cult_physeqR_tax_samples, ~Species, function(x) c(mean = mean(x$Abundance)))
# get the species names with high and low average relative abundance, respectively
# Species whose rel. abundance is higher than 1% (or threshold [abund_thresh] defined above)
high_ab_sp <- means[means$mean >= abund_thresh, "Species"]
# Species whose rel. abundance is less than 1%
low_ab_sp <- means[means$mean < abund_thresh, "Species"]
# Create two data frames with high and low avg. abundances
cult_physeqR_tax_high <- cult_physeqR_tax_samples[cult_physeqR_tax_samples$Species %in% high_ab_sp,]
cult_physeqR_tax_low <- cult_physeqR_tax_samples[cult_physeqR_tax_samples$Species %in% low_ab_sp,]
low_abund <- cult_physeqR_tax_low
lowabund_str <- paste(c("<", abund_thresh, "% avg. abundance"), collapse = " ")
low_abund$Species <- lowabund_str
cult_physeqR_tax_high_full <- rbind(cult_physeqR_tax_high, low_abund)
# convert Species to a character vector from a factor because R
cult_physeqR_tax_low$Species <- as.character(cult_physeqR_tax_low$Species)
# group data frame by Species, calculate max rel. abundance
maxs <- ddply(cult_physeqR_tax_low, ~Species, function(x) c(max = max(x$Abundance)))
# find Species whose max. abundance is less than 0.6% [threshold "sp_res_th" defined above]
remainder <- maxs[maxs$max <= sp_res_th,]$Species
# change their name to "Other species"
cult_physeqR_tax_low[cult_physeqR_tax_low$Species %in% remainder,]$Species <- 'Other species'
# create nicely formatted guides with the species names
ordered_species_list <- ordered_species_guides(cult_physeqR_tax_low$Species, "Other species")
ordered_species <- ordered_species_list[[1]]
ordered_sp_labels <- ordered_species_list[[2]]
ha_ordered_species_list <- ordered_species_guides(cult_physeqR_tax_high$Species, lowabund_str)
ha_ordered_species <- ha_ordered_species_list[[1]]
ha_ordered_sp_labels <- ha_ordered_species_list[[2]]
# sample_ID levels
sampleid_vector <- c("NWC.2", "NWC.3", "NWC.4",
"eRWC.y.2", "eRWC.y.3", "eRWC.y.4",
"eRWC.o.2", "eRWC.o.3", "eRWC.o.4",
"eRWC.H.y.2", "eRWC.H.y.3", "eRWC.H.y.4",
"eRWC.H.o.2", "eRWC.H.o.3", "eRWC.H.o.4",
"eRWC.HS.y.2", "eRWC.HS.y.3", "eRWC.HS.y.4",
"eRWC.HS.o.2", "eRWC.HS.o.3", "eRWC.HS.o.4")
# Colors are set manually
species_colors <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499",
"#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888")
# Define font size for both plots here
yaxis_tick_label_size = 10
yaxis_label_size = 10
xaxis_tick_label_size = 12
Xaxis_rot = 0
df_sorter = c("Species", "sample_subtype")
x_labels = c(".1",".2",".3")
# High abundant species plot
p1 <- draw_ha_stacked_plot(df = cult_physeqR_tax_high_full,
sampleid_vector = sampleid_vector,
x_factor = "sample_ID",
x_labels = x_labels,
species_colors = species_colors,
df_sorter = df_sorter,
wrap_group = c("~sample_subtype"),
ordered_sp_labels = ha_ordered_sp_labels,
abund_thresh = abund_thresh,
sp_res_th = sp_res_th,
ordered_species = ha_ordered_species
)
# Low abundant species plot
p2 <- draw_la_stacked_plot(df = cult_physeqR_tax_low,
sampleid_vector = sampleid_vector,
x_labels = x_labels,
species_colors = species_colors,
ordered_sp_labels = ordered_sp_labels,
x_factor = "sample_ID",
ordered_species = ordered_species,
wrap_group = c("~sample_subtype"),
abund_thresh = abund_thresh,
sp_res_th = sp_res_th
)
This R script will analyze 16S results from ASV counts to graphical visualization mainly using phyloseq http://joey711.github.io/phyloseq/import-data.html
cheese_fp <- file.path(getwd(), "..", "data", "cheese", "ngs", "ASV_table.xlsx")
cheese_physeq = load_ngs_data(cheese_fp)
# remove thermized controls
cheese_physeq = subset_samples(cheese_physeq, sample_ID != "K302_DNA" & sample_ID != "K310_DNA")
# since we removed the thermized sample, there may be taxa with count = 0
# this will eventually remove them
cheese_physeq <- prune_taxa(taxa_sums(cheese_physeq) > 1, cheese_physeq)
# get relative abundance data
cheese_physeqR <- transform_sample_counts(cheese_physeq, function(x) x/sum(x)*100 )
# font sizes for plots
yaxis_tick_label_size = 10
yaxis_label_size = 10
title_size = 14
subtitle_size = 14
xaxis_tick_label_size = 12
# bray = "Bray-Curtis dissimilarity"
# NMDS = Non-metric Multidimensional Scaling
# import raw sequences that were already quality filtered as described in Dreier et al 2022
fp_culture <- file.path(getwd(), "..", "data", "cultures", "ngs", "ASV_table.xlsx")
cult_physeq <- load_ngs_data(fp_culture)
cult_physeqR = transform_sample_counts(cult_physeq, function(x) x/sum(x)*100 )
ordi = ordinate(cult_physeqR, method = "NMDS", distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2108115
## Run 1 stress 0.2088326
## ... New best solution
## ... Procrustes: rmse 0.1329443 max resid 0.3750953
## Run 2 stress 0.2352694
## Run 3 stress 0.215075
## Run 4 stress 0.1980362
## ... New best solution
## ... Procrustes: rmse 0.04436881 max resid 0.2204983
## Run 5 stress 0.2480068
## Run 6 stress 0.2404204
## Run 7 stress 0.222634
## Run 8 stress 0.2270414
## Run 9 stress 0.2149473
## Run 10 stress 0.2272627
## Run 11 stress 0.2138737
## Run 12 stress 0.1990086
## Run 13 stress 0.2672146
## Run 14 stress 0.220576
## Run 15 stress 0.2263959
## Run 16 stress 0.1985512
## Run 17 stress 0.2297981
## Run 18 stress 0.1990155
## Run 19 stress 0.2144827
## Run 20 stress 0.2328488
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 16: stress ratio > sratmax
## 3: scale factor of the gradient < sfgrmin
dnmds_plt <- plot_ordination(cult_physeqR, ordi, "sample_ID", color = "sample_type")
# replace Na with "None" in milk_treatment factor
NMDS_df <- dnmds_plt$data %>% replace_na(list(milk_treatment = "None"))
# plot with all the sample_types
cult_p_ord <- ggplot(NMDS_df,
aes(NMDS1, NMDS2,
color = milk_treatment,
shape = sample_type)) +
geom_point(size = 6) +
scale_color_manual(values = c("H" = "#117733",
"HS" = "#332288",
"None" = "#AA4499")) +
labs(title = "NMDS (Bray-Curtis), milk & cultures") +
guides(color = guide_legend(order = 1),
shape = guide_legend(order = 2)) +
theme(text = element_text(size = xaxis_tick_label_size),
plot.title = element_text(size = title_size, hjust = 0.5, )) +
geom_text(mapping = aes(label = culture_type), color = 001,
vjust = 0, hjust = 1.5, size = xaxis_tick_label_size/2)
# bray = "Bray-Curtis dissimilarity"
# NMDS = Non-metric MultiDimentional Scaling
ordi = ordinate(cheese_physeqR, method = "NMDS", distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1336254
## Run 1 stress 0.1336254
## ... Procrustes: rmse 5.248895e-05 max resid 0.0001167245
## ... Similar to previous best
## Run 2 stress 0.1336254
## ... Procrustes: rmse 0.0001015451 max resid 0.0002381264
## ... Similar to previous best
## Run 3 stress 0.1336254
## ... New best solution
## ... Procrustes: rmse 4.117213e-06 max resid 8.702852e-06
## ... Similar to previous best
## Run 4 stress 0.1437675
## Run 5 stress 0.1799776
## Run 6 stress 0.1336254
## ... Procrustes: rmse 6.414714e-05 max resid 0.0001440501
## ... Similar to previous best
## Run 7 stress 0.1336254
## ... Procrustes: rmse 4.504085e-05 max resid 9.1842e-05
## ... Similar to previous best
## Run 8 stress 0.1336254
## ... Procrustes: rmse 3.813832e-05 max resid 8.119956e-05
## ... Similar to previous best
## Run 9 stress 0.1903222
## Run 10 stress 0.1336254
## ... Procrustes: rmse 0.0001145622 max resid 0.000254667
## ... Similar to previous best
## Run 11 stress 0.1336254
## ... Procrustes: rmse 2.327019e-05 max resid 5.289384e-05
## ... Similar to previous best
## Run 12 stress 0.1336254
## ... Procrustes: rmse 8.042166e-05 max resid 0.0001795095
## ... Similar to previous best
## Run 13 stress 0.1336254
## ... Procrustes: rmse 0.0001050267 max resid 0.0002551261
## ... Similar to previous best
## Run 14 stress 0.1741222
## Run 15 stress 0.1741986
## Run 16 stress 0.1437675
## Run 17 stress 0.2331284
## Run 18 stress 0.1336254
## ... Procrustes: rmse 2.773221e-05 max resid 5.694338e-05
## ... Similar to previous best
## Run 19 stress 0.1336254
## ... Procrustes: rmse 1.341918e-05 max resid 2.672063e-05
## ... Similar to previous best
## Run 20 stress 0.2119651
## *** Best solution repeated 10 times
dnmds_plt <- plot_ordination(cheese_physeqR, ordi, "sample_ID", color = "milk_treatment")
NMDS_df <- dnmds_plt$data
# plot with all the sample_types
ch_p_ord = ggplot(NMDS_df,
aes(NMDS1, NMDS2,
color = milk_treatment,
shape = culture_type)) +
geom_point(size = 6) +
scale_color_manual(values = c("H" = "#117733",
"HS" = "#332288",
"None" = "#AA4499",
"Control" = "#888888")) +
labs(title = "NMDS (Bray-Curtis), cheese 120d") +
guides(color = guide_legend(order = 1),
shape = guide_legend(order = 2)) +
theme(text = element_text(size = xaxis_tick_label_size),
plot.title = element_text(size = title_size, hjust = 0.5)) +
geom_text(mapping = aes(label = production_day), color = 001,
vjust = 0.5, hjust = 1.75, size = xaxis_tick_label_size/2)
## Warning: Removed 15 rows containing missing values (`geom_text()`).
## Removed 15 rows containing missing values (`geom_text()`).
# Diversity parameters plot----
# Summarize alpha-diversity values in table
metadata <- as(sample_data(cheese_physeq), "data.frame")
richness_table <- estimate_richness(cheese_physeq)
richness_table["Adjunct culture"] <- metadata["AdjunctCulture"]
richness_table["Production day"] <- metadata["production_day"]
# Reorder columns
richness_table <- richness_table[, c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8 , 9)]
richness_table
## Adjunct culture Production day Observed Chao1 se.chao1 ACE
## K301_DNA Control 1 25 25 0.0000000 25.00000
## K303_DNA eRWC.y 1 23 23 0.4890096 NaN
## K304_DNA eRWC.H.y 1 27 27 0.4906534 27.54519
## K305_DNA eRWC.HS.y 1 25 25 0.4898979 25.62109
## K306_DNA eRWC.o 1 24 24 0.0000000 24.00000
## K307_DNA eRWC.H.o 1 28 28 0.4909903 28.34823
## K308_DNA eRWC.HS.o 1 27 27 0.2453267 28.05837
## K309_DNA Control 2 18 18 0.0000000 18.00000
## K311_DNA eRWC.y 2 20 20 0.0000000 20.00000
## K312_DNA eRWC.H.y 2 29 29 0.0000000 29.00000
## K313_DNA eRWC.HS.y 2 24 24 0.0000000 24.00000
## K314_DNA eRWC.o 2 27 27 0.0000000 27.00000
## K315_DNA eRWC.H.o 2 27 27 0.0000000 27.00000
## K316_DNA eRWC.HS.o 2 24 24 0.0000000 24.00000
## se.ACE Shannon Simpson InvSimpson Fisher
## K301_DNA 2.000000 1.523419 0.6485980 2.845743 2.522969
## K303_DNA NaN 1.294502 0.6385605 2.766715 2.136409
## K304_DNA 1.408130 1.461295 0.7130358 3.484755 2.555412
## K305_DNA 1.436585 1.467699 0.7218074 3.594632 2.307830
## K306_DNA 1.620185 1.366634 0.6789691 3.114965 2.203134
## K307_DNA 1.939789 1.491580 0.6977966 3.309029 2.723312
## K308_DNA 1.953896 1.440837 0.6901837 3.227719 2.566512
## K309_DNA 1.581139 1.071967 0.4649089 1.868841 1.687209
## K311_DNA 1.788854 1.195255 0.5412021 2.179609 1.881448
## K312_DNA 1.856953 1.689331 0.7426990 3.886499 2.712114
## K313_DNA 1.620185 1.547118 0.7358553 3.785803 2.204451
## K314_DNA 1.632993 1.322028 0.6130468 2.584292 2.561010
## K315_DNA 1.360828 1.732635 0.7466947 3.947805 2.638525
## K316_DNA 1.825742 1.352408 0.6385323 2.766499 2.293975
# write to excel
write.xlsx(richness_table, '../tables/cheese_richness_table.xlsx', colNames = TRUE, rowNames = TRUE, asTable = FALSE, keepNA = FALSE, na.string = "NaN")
# Beta diversity Permanova
adonis2(distance(cheese_physeq, method="bray") ~ AdjunctCulture, data = metadata, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = distance(cheese_physeq, method = "bray") ~ AdjunctCulture, data = metadata, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## AdjunctCulture 6 0.67595 0.63871 2.0625 0.0365 *
## Residual 7 0.38236 0.36129
## Total 13 1.05830 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 1 rows containing missing values (`geom_point()`).
# percentage thresholds used for filtering
abund_thresh <- 1.0
sp_res_th <- 0.6
# Taxa agglomeration----
# Differently to Ord. & Div. plots, in bar charts a taxa filter is necessary to show the most abundant species
# Let's agglomerate at the Species level using the tax_glom function from phyloseq
# It requires "Kingdom" to be the first column of our tax_table
# (see probl. solved https://github.com/joey711/phyloseq/issues/1551)
tax_table(cheese_physeqR) <- tax_table(cheese_physeqR)[,c(2:10,1)] # Re-order the columns
# Let's agglomerate at the Species level
# i.e. different ASV belonging to the same species will be agglomerated
# their relative abundance will be summed
# further the data frame is melted to long format for filtering and plotting
ch_physeqR_tax <- cheese_physeqR %>%
tax_glom(taxrank = "Species") %>%
psmelt()
# Select only the natural whey and the enriched raw milk-whey culture samples
ch_physeqR_tax_samples <- ch_physeqR_tax
# create a mapper to replace old lactobacilli names in table
fp = file.path(getwd(), "..", "data", "species_dict.json")
species_dict <- fromJSON(file = fp, simplify = TRUE)
# In order to get groups in the plots add a group category and rename replicates # Cultures only
ch_physeqR_tax_samples <- ch_physeqR_tax_samples %>%
mutate(SubExperiment = paste("Day", substr(SubExperiment, nchar(SubExperiment), nchar(SubExperiment))))
# For plotting it looks nicer when the species names are separated by spaces
ch_physeqR_tax_samples <- ch_physeqR_tax_samples %>%
mutate(Species = str_replace(Species, "_", " ")) %>%
# update new lactobacilli names
mutate(Species = recode_factor(Species, !!!species_dict)) %>%
# change Species to sp. for unknown species names
mutate(Species = str_replace(Species, "Species", "sp."))
# group data frame by Species, calculate mean rel. abundance
ch_means <- ddply(ch_physeqR_tax_samples, ~Species, function(x) c(mean = mean(x$Abundance)))
# get the species names with high and low average relative abundance, respectively
# Species whose rel. abundance is higher than 1% (or threshold [abund_thresh] defined above)
ch_high_ab_sp <- ch_means[ch_means$mean >= abund_thresh, "Species"]
# Species whose rel. abundance is less than 1%
ch_low_ab_sp <- ch_means[ch_means$mean < abund_thresh, "Species"]
# Create two data frames with high and low avg. abundances
ch_physeqR_tax_high <- ch_physeqR_tax_samples[ch_physeqR_tax_samples$Species %in% ch_high_ab_sp,]
ch_physeqR_tax_low <- ch_physeqR_tax_samples[ch_physeqR_tax_samples$Species %in% ch_low_ab_sp,]
low_abund <- ch_physeqR_tax_low
lowabund_str <- paste(c("<", abund_thresh, "% avg. abundance"), collapse = " ")
low_abund$Species <- lowabund_str
ch_physeqR_tax_high_full <- rbind(ch_physeqR_tax_high, low_abund)
# convert Species to a character vector from a factor because R
ch_physeqR_tax_low$Species <- as.character(ch_physeqR_tax_low$Species)
# group data frame by Species, calculate max rel. abundance
ch_maxs <- ddply(ch_physeqR_tax_low, ~Species, function(x) c(max = max(x$Abundance)))
# find Species whose max. abundance is less than 0.6 % [threshold "sp_res_th" defined above]
ch_remainder <- ch_maxs[ch_maxs$max <= sp_res_th,]$Species
# change their name to "Other species"
ch_physeqR_tax_low[ch_physeqR_tax_low$Species %in% ch_remainder,]$Species <- 'Other species'
# create nicely formatted guides with the species names
ordered_species_list <- ordered_species_guides(ch_physeqR_tax_low$Species, "Other species")
ordered_species <- ordered_species_list[[1]]
ordered_sp_labels <- ordered_species_list[[2]]
ha_ordered_species_list <- ordered_species_guides(ch_physeqR_tax_high$Species, lowabund_str)
ha_ordered_species <- ha_ordered_species_list[[1]]
ha_ordered_sp_labels <- ha_ordered_species_list[[2]]
# Colors are set manually
species_colors <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499",
"#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888")
# Define font size for both plots here
yaxis_tick_label_size = 10
yaxis_label_size = 10
xaxis_tick_label_size = 12
Xaxis_rot = 45
# sample_ID levels
sampleid_vector <- c("Control.3.1", "eRWC.y.3.1", "eRWC.H.y.3.1",
"eRWC.HS.y.3.1", "eRWC.o.3.1", "eRWC.H.o.3.1",
"eRWC.HS.o.3.1", "Control.3.2", "eRWC.y.3.2",
"eRWC.H.y.3.2", "eRWC.HS.y.3.2", "eRWC.o.3.2",
"eRWC.H.o.3.2", "eRWC.HS.o.3.2")
sample_id_labels = as.vector(sapply(sampleid_vector, FUN = function(X) substr(X, 1, nchar(X) - 4)))
# Define font size for both plots here
yaxis_tick_label_size = 10
yaxis_label_size = 10
xaxis_tick_label_size = 12
Xaxis_rot = 0
df_sorter = c("Species", "sample_subtype")
x_labels = c(".1",".2",".3")
# High abundant species plot
p3 <- draw_ha_stacked_plot(df = ch_physeqR_tax_high_full,
sampleid_vector = sampleid_vector,
x_factor = "AdjunctCulture_ID",
x_labels = sample_id_labels,
species_colors = species_colors,
df_sorter = df_sorter,
wrap_group = c("~SubExperiment"),
Xaxis_rot = 45,
ordered_sp_labels = ha_ordered_sp_labels,
abund_thresh = abund_thresh,
sp_res_th = sp_res_th,
ordered_species = ha_ordered_species
)
# Low abundant species plot
p4 <- draw_la_stacked_plot(df = ch_physeqR_tax_low,
sampleid_vector = sampleid_vector,
x_labels = sample_id_labels,
species_colors = species_colors,
ordered_sp_labels = ordered_sp_labels,
x_factor = "AdjunctCulture_ID",
ordered_species = ordered_species,
wrap_group = c("~SubExperiment"),
Xaxis_rot = 45,
abund_thresh = abund_thresh,
sp_res_th = sp_res_th
)
# define paths explicitly
subset_fp <- file.path(getwd(), "..", "data", "cheese", "pivot")
meta_0d_fp <- file.path(subset_fp, "meta_microbes_0d_data.csv")
micro_0d_fp <- file.path(subset_fp, "piv_microbes_0d_data.csv")
meta_1d_fp <- file.path(subset_fp, "meta_microbes_1d_data.csv")
micro_1d_fp <- file.path(subset_fp, "piv_microbes_1d_data.csv")
meta_chem_1d_fp <- file.path(subset_fp, "meta_chem_1d_data.csv")
chem_1d_fp <- file.path(subset_fp, "piv_chem_1d_data.csv")
meta_60d_fp <- file.path(subset_fp, "meta_microbes_60d_data.csv")
micro_60d_fp <- file.path(subset_fp, "piv_microbes_60d_data.csv")
meta_120d_fp <- file.path(subset_fp, "meta_microbes_120d_data.csv")
micro_120d_fp <- file.path(subset_fp, "piv_microbes_120d_data.csv")
meta_chem_120d_fp <- file.path(subset_fp, "meta_chem_120d_data.csv")
chem_120d_fp <- file.path(subset_fp, "piv_chem_120d_data.csv")
micro_0d <- import_and_select_PCA_data(
micro_0d_fp, meta_0d_fp,
select_samples = c("MilkTreatment", "raw")
)
micro_1d <- import_and_select_PCA_data(
micro_1d_fp, meta_1d_fp,
select_samples = c("MilkTreatment", "raw"),
# remove the entire 304 sample due to missing measurements
rm_samples = c("EH_304_cheese_1d")
)
chem_1d <- import_and_select_PCA_data(
chem_1d_fp, meta_chem_1d_fp,
select_samples = c("MilkTreatment", "raw"),
rm_samples = c("EH_304_cheese_1d"),
## remove some highly correlated features
redundant_f = c("L.Lactic.acid.proportion.of.TLA", "Lactic.acid..total.")
)
# make sure we don't have the meta data twice
meta_len <- length(names(read.csv(meta_chem_1d_fp))) + 1
chem_1d_data <- chem_1d[,c(1, meta_len:length(chem_1d))]
# merge chemical and microbes data
dataset_1d <- merge(micro_1d, chem_1d_data, by = "sampleID")
micro_60d <- import_and_select_PCA_data(
micro_60d_fp, meta_60d_fp,
select_samples = c("MilkTreatment", "raw")
)
micro_120d <- import_and_select_PCA_data(
micro_120d_fp, meta_120d_fp,
select_samples = c("MilkTreatment", "raw")
)
# remove some highly correlated features & features with NAs (Force & Fracture)
hcorr_fc <- c("L.Lactic.acid.proportion.of.TLA",
"Lactic.acid..total.",
"Total.volatile.carboxylic.acids",
"Biogenic.amine",
"Chloride",
"Force.at.33",
"Force.at.fracture",
"Fracture.deformation")
chem_120d <- import_and_select_PCA_data(chem_120d_fp, meta_chem_120d_fp,
select_samples = c("MilkTreatment", "raw"),
redundant_f = hcorr_fc
)
# make sure we don't have the meta data twice
meta_len <- length(names(read.csv(meta_chem_120d_fp))) + 1
chem_120d_data <- chem_120d[,c(1, meta_len:length(chem_120d))]
# merge chemical and microbes data
dataset_120d <- merge(micro_120d, chem_120d_data, by = "sampleID")
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] missMDA_1.18 phyloseq_1.42.0 rjson_0.2.21
## [4] magrittr_2.0.3 ggtext_0.1.2 vegan_2.6-4
## [7] lattice_0.20-45 permute_0.9-7 BiocManager_1.30.19
## [10] patchwork_1.1.2 FSA_0.9.4 agricolae_1.3-5
## [13] FactoMineR_2.7 factoextra_1.0.7 reshape2_1.4.4
## [16] outliers_0.15 lubridate_1.9.2 forcats_1.0.0
## [19] stringr_1.5.0 dplyr_1.1.0 purrr_1.0.1
## [22] readr_2.1.4 tidyr_1.3.0 tibble_3.2.0
## [25] ggplot2_3.4.1 tidyverse_2.0.0 gridExtra_2.3
## [28] plyr_1.8.8 openxlsx_4.2.5.2
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 systemfonts_1.0.4 igraph_1.4.0
## [4] splines_4.2.2 AlgDesign_1.2.1 GenomeInfoDb_1.34.9
## [7] digest_0.6.31 foreach_1.5.2 htmltools_0.5.4
## [10] fansi_1.0.4 doParallel_1.0.17 cluster_2.1.4
## [13] tzdb_0.3.0 Biostrings_2.66.0 timechange_0.2.0
## [16] colorspace_2.1-0 ggrepel_0.9.3 textshaping_0.3.6
## [19] haven_2.5.2 xfun_0.37 crayon_1.5.2
## [22] RCurl_1.98-1.10 jsonlite_1.8.4 survival_3.5-3
## [25] iterators_1.0.14 ape_5.7 glue_1.6.2
## [28] gtable_0.3.1 zlibbioc_1.44.0 emmeans_1.8.4-1
## [31] XVector_0.38.0 questionr_0.7.8 car_3.1-1
## [34] Rhdf5lib_1.20.0 dunn.test_1.3.5 BiocGenerics_0.44.0
## [37] abind_1.4-5 scales_1.2.1 mvtnorm_1.1-3
## [40] DBI_1.1.3 rstatix_0.7.2 miniUI_0.1.1.1
## [43] Rcpp_1.0.10 xtable_1.8-4 gridtext_0.1.5
## [46] flashClust_1.01-2 stats4_4.2.2 DT_0.27
## [49] htmlwidgets_1.6.1 ellipsis_0.3.2 mice_3.15.0
## [52] pkgconfig_2.0.3 farver_2.1.1 multcompView_0.1-8
## [55] sass_0.4.5 utf8_1.2.3 tidyselect_1.2.0
## [58] labeling_0.4.2 rlang_1.0.6 later_1.3.0
## [61] munsell_0.5.0 tools_4.2.2 cachem_1.0.7
## [64] cli_3.6.0 generics_0.1.3 ade4_1.7-22
## [67] broom_1.0.3 evaluate_0.20 biomformat_1.26.0
## [70] fastmap_1.1.1 yaml_2.3.7 ragg_1.2.5
## [73] knitr_1.42 zip_2.2.2 nlme_3.1-162
## [76] mime_0.12 leaps_3.1 xml2_1.3.3
## [79] compiler_4.2.2 rstudioapi_0.14 ggsignif_0.6.4
## [82] klaR_1.7-1 bslib_0.4.2 stringi_1.7.12
## [85] highr_0.10 Matrix_1.5-1 commonmark_1.8.1
## [88] markdown_1.5 multtest_2.54.0 vctrs_0.5.2
## [91] pillar_1.8.1 lifecycle_1.0.3 rhdf5filters_1.10.0
## [94] combinat_0.0-8 jquerylib_0.1.4 estimability_1.4.1
## [97] data.table_1.14.8 bitops_1.0-7 httpuv_1.6.9
## [100] R6_2.5.1 promises_1.2.0.1 IRanges_2.32.0
## [103] codetools_0.2-19 MASS_7.3-58.2 rhdf5_2.42.0
## [106] withr_2.5.0 S4Vectors_0.36.1 GenomeInfoDbData_1.2.9
## [109] mgcv_1.8-41 parallel_4.2.2 hms_1.1.2
## [112] grid_4.2.2 labelled_2.10.0 coda_0.19-4
## [115] rmarkdown_2.20 carData_3.0-5 ggpubr_0.6.0
## [118] scatterplot3d_0.3-42 Biobase_2.58.0 shiny_1.7.4